"""
Goldberg Reserve Tests
======================
Two tests motivated by Goldberg & Hannaoui (2026, NY Fed SR 1087):

Test 1: Concentrated Holder Sensitivity
  - Re-run FX reserve regressions dropping China, Japan, and both
  - Tests whether the Z₁=77-84*** signal is genuine or driven by two outliers

Test 2: Reserve Adequacy Threshold
  - Split reserves into adequacy tranche (< 3 months imports proxy)
    and investment tranche (above threshold)
  - Test whether Z₁ concentrates in the investment tranche

Test 3: Reserve Accumulation Dynamics
  - First-differenced reserves (ΔFX) as dependent variable
  - Interaction with income level and safe-issuer status
  - Tests whether aging predicts reserve accumulation rates differently
    for different country types

Output: goldberg_reserve_analysis.md (standalone document)
"""

import pandas as pd
import numpy as np
from pathlib import Path
import sys
import warnings
warnings.filterwarnings('ignore')

PROJECT_DIR = Path("/mnt/c/demographics_capital_flows/net_gross")
ROOT_DIR = PROJECT_DIR.parent
sys.path.insert(0, str(ROOT_DIR / "multilateral" / "src"))
from model import PanelGLS

DATA = PROJECT_DIR / "data" / "processed"
OUT = PROJECT_DIR / "output"
OUT.mkdir(parents=True, exist_ok=True)

OECD = [
    'AUS', 'AUT', 'BEL', 'CAN', 'CHL', 'COL', 'CRI', 'CZE', 'DNK', 'EST',
    'FIN', 'FRA', 'DEU', 'GRC', 'HUN', 'ISL', 'IRL', 'ISR', 'ITA', 'JPN',
    'KOR', 'LVA', 'LTU', 'LUX', 'MEX', 'NLD', 'NZL', 'NOR', 'POL', 'PRT',
    'SVK', 'SVN', 'ESP', 'SWE', 'CHE', 'TUR', 'GBR', 'USA',
]

SAFE_ISSUERS = [
    'USA', 'DEU', 'GBR', 'FRA', 'CAN', 'AUS', 'CHE', 'NLD', 'AUT', 'DNK',
    'FIN', 'NOR', 'SWE', 'SGP', 'HKG', 'LUX', 'NZL', 'BEL', 'KOR', 'TWN',
    'CZE', 'KWT', 'QAT', 'ARE',
]

DEMO_VARS = ['Z_1', 'Z_2', 'Z_3']
CONTROLS = ['fiscal_bal_gdp', 'nfa_gdp_lag', 'log_rel_opw', 'kaopen']


def stars(p):
    if p < 0.01: return '***'
    if p < 0.05: return '**'
    if p < 0.1: return '*'
    return ''


def run_gls(df, y_var, x_vars, label, silent=False):
    cols = [y_var] + x_vars + ['iso3', 'year']
    sub = df[cols].dropna()
    if len(sub) < 50:
        if not silent:
            print(f"  SKIP {label}: only {len(sub)} obs")
        return None

    gls = PanelGLS()
    gls.fit(sub[y_var].values, sub[x_vars].values,
            sub['iso3'].values, sub['year'].values)

    result = {
        'label': label, 'dep_var': y_var,
        'n_obs': gls.n_obs, 'n_countries': gls.n_countries,
        'r_squared': gls.r_squared, 'rho': gls.rho,
    }
    for i, name in enumerate(x_vars):
        result[f'{name}_coef'] = gls.beta[i]
        result[f'{name}_se'] = gls.se[i]
        result[f'{name}_p'] = gls.pvalues[i]

    if not silent:
        print(f"  {label}: N={gls.n_obs}, {gls.n_countries} countries, R²={gls.r_squared:.4f}")
        for v in DEMO_VARS:
            if f'{v}_coef' in result:
                print(f"    {v}: {result[f'{v}_coef']:.2f}{stars(result[f'{v}_p'])} "
                      f"(se={result[f'{v}_se']:.2f}, p={result[f'{v}_p']:.4f})")

    return result


def main():
    print("=" * 70)
    print("GOLDBERG RESERVE TESTS")
    print("Motivated by Goldberg & Hannaoui (2026, NY Fed SR 1087)")
    print("=" * 70)

    df = pd.read_csv(DATA / "net_gross_panel.csv")
    df = df[df['year'] <= 2024].copy()
    print(f"Panel: {len(df):,} obs, {df['iso3'].nunique()} countries")

    controls = [c for c in CONTROLS if c in df.columns and df[c].notna().sum() > 200]
    base_vars = DEMO_VARS + controls

    # Check China/Japan data
    for iso in ['CHN', 'JPN']:
        cdf = df[(df['iso3'] == iso) & df['fx_reserves_gdp'].notna()]
        if len(cdf) > 0:
            print(f"  {iso}: {len(cdf)} obs, FX reserves mean={cdf['fx_reserves_gdp'].mean():.1f}% GDP, "
                  f"max={cdf['fx_reserves_gdp'].max():.1f}%")

    all_results = []
    doc_lines = []

    # ══════════════════════════════════════════════════════════════════
    # TEST 1: CONCENTRATED HOLDER SENSITIVITY
    # ══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 60)
    print("TEST 1: CONCENTRATED HOLDER SENSITIVITY")
    print("=" * 60)
    doc_lines.append("# Goldberg Reserve Tests")
    doc_lines.append("")
    doc_lines.append("*Motivated by Goldberg & Hannaoui (2026, NY Fed SR 1087). "
                     "Tests whether the demographic signal on FX reserves (Z₁=77-84*** "
                     "in the net/gross paper) is robust to dropping concentrated holders "
                     "and whether it operates differently across reserve adequacy thresholds.*")
    doc_lines.append("")
    doc_lines.append("## Test 1: Concentrated Holder Sensitivity")
    doc_lines.append("")
    doc_lines.append("Goldberg & Hannaoui show that China and Japan dominate aggregate "
                     "reserve patterns. If our Z₁ signal is driven by these two countries' "
                     "unique demographics (Japan: oldest; China: rapid aging + massive reserves), "
                     "the finding is not genuinely cross-sectional.")
    doc_lines.append("")

    # Baseline
    print("\n  [Baseline — full sample]")
    r_base = run_gls(df, 'fx_reserves_gdp', base_vars, 'Baseline (full)')
    all_results.append(r_base)

    # Drop China
    print("\n  [Drop China]")
    df_no_chn = df[df['iso3'] != 'CHN'].copy()
    r_no_chn = run_gls(df_no_chn, 'fx_reserves_gdp', base_vars, 'Excl China')
    all_results.append(r_no_chn)

    # Drop Japan
    print("\n  [Drop Japan]")
    df_no_jpn = df[df['iso3'] != 'JPN'].copy()
    r_no_jpn = run_gls(df_no_jpn, 'fx_reserves_gdp', base_vars, 'Excl Japan')
    all_results.append(r_no_jpn)

    # Drop both
    print("\n  [Drop China + Japan]")
    df_no_both = df[~df['iso3'].isin(['CHN', 'JPN'])].copy()
    r_no_both = run_gls(df_no_both, 'fx_reserves_gdp', base_vars, 'Excl CHN+JPN')
    all_results.append(r_no_both)

    # Drop top 5 reserve holders
    print("\n  [Drop top 5 reserve holders by mean]")
    mean_reserves = df.groupby('iso3')['fx_reserves_gdp'].mean().sort_values(ascending=False)
    top5 = mean_reserves.head(5).index.tolist()
    print(f"    Top 5 by mean reserves/GDP: {top5}")
    df_no_top5 = df[~df['iso3'].isin(top5)].copy()
    r_no_top5 = run_gls(df_no_top5, 'fx_reserves_gdp', base_vars, f'Excl top5 ({",".join(top5)})')
    all_results.append(r_no_top5)

    # OECD only
    print("\n  [OECD only]")
    df_oecd = df[df['iso3'].isin(OECD)].copy()
    r_oecd = run_gls(df_oecd, 'fx_reserves_gdp', base_vars, 'OECD only')
    all_results.append(r_oecd)

    # Non-OECD only
    print("\n  [Non-OECD only]")
    df_nonoecd = df[~df['iso3'].isin(OECD)].copy()
    r_nonoecd = run_gls(df_nonoecd, 'fx_reserves_gdp', base_vars, 'Non-OECD only')
    all_results.append(r_nonoecd)

    # Write table
    doc_lines.append("| Sample | Z₁ | se | p | Z₂ | p | Z₃ | p | N | Countries | R² |")
    doc_lines.append("|:--|--:|--:|--:|--:|--:|--:|--:|--:|--:|--:|")
    for r in all_results:
        if r is None:
            continue
        row = f"| {r['label']} "
        for v in DEMO_VARS:
            coef = r.get(f'{v}_coef', np.nan)
            se = r.get(f'{v}_se', np.nan)
            p = r.get(f'{v}_p', np.nan)
            if v == 'Z_1':
                row += f"| {coef:.1f}{stars(p)} | {se:.1f} | {p:.3f} "
            else:
                row += f"| {coef:.1f} | {p:.3f} "
        row += f"| {r['n_obs']} | {r['n_countries']} | {r['r_squared']:.3f} |"
        doc_lines.append(row)

    doc_lines.append("")

    # Assess
    if r_base and r_no_both:
        base_z1 = r_base['Z_1_coef']
        drop_z1 = r_no_both['Z_1_coef']
        attenuation = (1 - drop_z1/base_z1) * 100 if base_z1 != 0 else 0
        base_p = r_base['Z_1_p']
        drop_p = r_no_both['Z_1_p']
        doc_lines.append(f"**Assessment:** Dropping China and Japan changes Z₁ from "
                        f"{base_z1:.1f} (p={base_p:.4f}) to {drop_z1:.1f} (p={drop_p:.4f}), "
                        f"an attenuation of {attenuation:.0f}%. ")
        if drop_p < 0.05:
            doc_lines.append("The signal **survives** — it is genuinely cross-sectional, "
                            "not driven by two outliers.")
        elif drop_p < 0.1:
            doc_lines.append("The signal is **marginal** without China/Japan — weakened but "
                            "not eliminated, suggesting partial dependence on these outliers.")
        else:
            doc_lines.append("The signal **collapses** without China/Japan — the Z₁ effect "
                            "on reserves is driven primarily by these two countries.")
    doc_lines.append("")

    # ══════════════════════════════════════════════════════════════════
    # TEST 2: RESERVE ADEQUACY THRESHOLD
    # ══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 60)
    print("TEST 2: RESERVE ADEQUACY THRESHOLD")
    print("=" * 60)
    doc_lines.append("## Test 2: Reserve Adequacy Threshold")
    doc_lines.append("")
    doc_lines.append("Goldberg & Hannaoui distinguish precautionary reserves (liquidity buffer) "
                     "from investment reserves (excess accumulation). If demographics predict "
                     "the investment tranche but not the precautionary tranche, the mechanism "
                     "is lifecycle savings → surplus recycling, not risk management.")
    doc_lines.append("")

    # Create adequacy proxy: median reserves/GDP as threshold
    # (ideal: 3 months imports, but imports data not always available)
    # Use imports if available, otherwise use percentile-based threshold
    if 'imports_gdp' in df.columns:
        df['reserve_adequacy_threshold'] = df['imports_gdp'] * 0.25  # 3 months = 25% of annual
    else:
        # Use median fx_reserves_gdp as threshold (~20-25% GDP for many countries)
        df['reserve_adequacy_threshold'] = 20.0  # 20% GDP as rough threshold

    # Above/below threshold indicator
    df['high_reserves'] = (df['fx_reserves_gdp'] > df['reserve_adequacy_threshold']).astype(int)
    high_pct = df['high_reserves'].mean()
    print(f"  High reserves (above threshold): {high_pct:.1%}")

    # Split sample
    df_low = df[df['high_reserves'] == 0].copy()
    df_high = df[df['high_reserves'] == 1].copy()
    print(f"  Low reserves: {df_low['iso3'].nunique()} countries, {len(df_low)} obs")
    print(f"  High reserves: {df_high['iso3'].nunique()} countries, {len(df_high)} obs")

    t2_results = []
    print("\n  [Low reserves (below threshold)]")
    r_low = run_gls(df_low, 'fx_reserves_gdp', base_vars, 'Below threshold')
    t2_results.append(r_low)

    print("\n  [High reserves (above threshold)]")
    r_high = run_gls(df_high, 'fx_reserves_gdp', base_vars, 'Above threshold')
    t2_results.append(r_high)

    # Interaction test
    print("\n  [Interaction: Z₁ × high_reserves]")
    df['Z_1_x_high'] = df['Z_1'] * df['high_reserves']
    interact_vars = base_vars + ['high_reserves', 'Z_1_x_high']
    r_interact = run_gls(df, 'fx_reserves_gdp', interact_vars, 'Interaction')
    t2_results.append(r_interact)

    doc_lines.append("| Sample | Z₁ | se | p | N | Countries | R² |")
    doc_lines.append("|:--|--:|--:|--:|--:|--:|--:|")
    for r in t2_results:
        if r is None:
            continue
        z1c = r.get('Z_1_coef', np.nan)
        z1s = r.get('Z_1_se', np.nan)
        z1p = r.get('Z_1_p', np.nan)
        doc_lines.append(f"| {r['label']} | {z1c:.1f}{stars(z1p)} | {z1s:.1f} | "
                        f"{z1p:.3f} | {r['n_obs']} | {r['n_countries']} | {r['r_squared']:.3f} |")

    if r_interact and 'Z_1_x_high_coef' in r_interact:
        doc_lines.append(f"| Z₁ × high_reserves interaction | "
                        f"{r_interact['Z_1_x_high_coef']:.1f}{stars(r_interact['Z_1_x_high_p'])} | "
                        f"{r_interact['Z_1_x_high_se']:.1f} | {r_interact['Z_1_x_high_p']:.3f} | | | |")

    doc_lines.append("")
    if r_low and r_high:
        doc_lines.append(f"**Assessment:** Z₁ in low-reserve regime = {r_low['Z_1_coef']:.1f} "
                        f"(p={r_low['Z_1_p']:.3f}); in high-reserve regime = {r_high['Z_1_coef']:.1f} "
                        f"(p={r_high['Z_1_p']:.3f}). ")
        if r_high['Z_1_p'] < 0.05 and r_low['Z_1_p'] > 0.1:
            doc_lines.append("Demographics predict reserve **accumulation beyond adequacy** — "
                            "consistent with lifecycle savings surplus recycling into reserves.")
        elif r_low['Z_1_p'] < 0.05 and r_high['Z_1_p'] > 0.1:
            doc_lines.append("Demographics predict the **precautionary tranche** — suggesting "
                            "risk management rather than surplus recycling.")
        else:
            doc_lines.append("Both tranches show demographic effects — the mechanism is not "
                            "cleanly separated by adequacy thresholds.")
    doc_lines.append("")

    # ══════════════════════════════════════════════════════════════════
    # TEST 3: RESERVE ACCUMULATION DYNAMICS
    # ══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 60)
    print("TEST 3: RESERVE ACCUMULATION DYNAMICS")
    print("=" * 60)
    doc_lines.append("## Test 3: Reserve Accumulation Dynamics")
    doc_lines.append("")
    doc_lines.append("Do demographics predict the *change* in reserves (flow), not just the level? "
                     "And does this differ by safe-issuer status? Goldberg & Hannaoui show that "
                     "reserve composition shifts toward non-dollar assets correlate with "
                     "geopolitical/fiscal events. Demographics may be a structural determinant "
                     "of overall accumulation rates.")
    doc_lines.append("")

    # First difference
    df = df.sort_values(['iso3', 'year'])
    df['d_fx_reserves'] = df.groupby('iso3')['fx_reserves_gdp'].diff()

    # Safe issuer indicator
    df['safe_issuer'] = df['iso3'].isin(SAFE_ISSUERS).astype(int)
    df['Z_1_x_safe'] = df['Z_1'] * df['safe_issuer']

    t3_results = []

    print("\n  [ΔFX reserves — full sample]")
    r_dfx = run_gls(df, 'd_fx_reserves', base_vars, 'ΔFX reserves (full)')
    t3_results.append(r_dfx)

    print("\n  [ΔFX reserves — safe issuers]")
    df_safe = df[df['safe_issuer'] == 1].copy()
    r_dfx_safe = run_gls(df_safe, 'd_fx_reserves', base_vars, 'ΔFX reserves (safe)')
    t3_results.append(r_dfx_safe)

    print("\n  [ΔFX reserves — non-safe issuers]")
    df_nonsafe = df[df['safe_issuer'] == 0].copy()
    r_dfx_nonsafe = run_gls(df_nonsafe, 'd_fx_reserves', base_vars, 'ΔFX reserves (non-safe)')
    t3_results.append(r_dfx_nonsafe)

    print("\n  [ΔFX reserves — interaction with safe_issuer]")
    safe_interact_vars = base_vars + ['safe_issuer', 'Z_1_x_safe']
    r_dfx_interact = run_gls(df, 'd_fx_reserves', safe_interact_vars, 'ΔFX interaction')
    t3_results.append(r_dfx_interact)

    # Levels: safe vs non-safe
    print("\n  [FX reserves level — safe issuers]")
    r_lvl_safe = run_gls(df_safe, 'fx_reserves_gdp', base_vars, 'Level (safe)')
    t3_results.append(r_lvl_safe)

    print("\n  [FX reserves level — non-safe issuers]")
    r_lvl_nonsafe = run_gls(df_nonsafe, 'fx_reserves_gdp', base_vars, 'Level (non-safe)')
    t3_results.append(r_lvl_nonsafe)

    # 5-year lag
    print("\n  [FX reserves — 5yr lagged demographics]")
    lag_vars_available = True
    for v in ['Z_1_lag5', 'Z_2_lag5', 'Z_3_lag5']:
        if v not in df.columns:
            # Create them
            df[v] = df.groupby('iso3')[v.replace('_lag5', '')].shift(5)
    lag_demo = ['Z_1_lag5', 'Z_2_lag5', 'Z_3_lag5']
    lag_base = lag_demo + controls
    r_lag5 = run_gls(df, 'fx_reserves_gdp', lag_base, '5yr lag')
    t3_results.append(r_lag5)

    doc_lines.append("| Model | Z₁ | se | p | N | Countries | R² |")
    doc_lines.append("|:--|--:|--:|--:|--:|--:|--:|")
    for r in t3_results:
        if r is None:
            continue
        # Use Z_1 or Z_1_lag5 depending on model
        z1_key = 'Z_1_lag5' if 'Z_1_lag5_coef' in r else 'Z_1'
        z1c = r.get(f'{z1_key}_coef', np.nan)
        z1s = r.get(f'{z1_key}_se', np.nan)
        z1p = r.get(f'{z1_key}_p', np.nan)
        doc_lines.append(f"| {r['label']} | {z1c:.1f}{stars(z1p)} | {z1s:.1f} | "
                        f"{z1p:.3f} | {r['n_obs']} | {r['n_countries']} | {r['r_squared']:.3f} |")
    if r_dfx_interact and 'Z_1_x_safe_coef' in r_dfx_interact:
        doc_lines.append(f"| Z₁ × safe_issuer interaction | "
                        f"{r_dfx_interact['Z_1_x_safe_coef']:.1f}{stars(r_dfx_interact['Z_1_x_safe_p'])} | "
                        f"{r_dfx_interact['Z_1_x_safe_se']:.1f} | {r_dfx_interact['Z_1_x_safe_p']:.3f} | | | |")

    doc_lines.append("")

    # ══════════════════════════════════════════════════════════════════
    # TEST 4: LEAVE-ONE-OUT JACKKNIFE (top reserve holders)
    # ══════════════════════════════════════════════════════════════════
    print("\n" + "=" * 60)
    print("TEST 4: LEAVE-ONE-OUT JACKKNIFE (top 10 reserve holders)")
    print("=" * 60)
    doc_lines.append("## Test 4: Leave-One-Out Jackknife")
    doc_lines.append("")
    doc_lines.append("Drop each of the top 10 reserve holders individually to identify "
                     "which countries are driving the Z₁ signal.")
    doc_lines.append("")

    top10 = mean_reserves.head(10).index.tolist()
    print(f"  Top 10 by mean reserves/GDP: {top10}")
    print(f"  Mean reserves/GDP: {[f'{mean_reserves[c]:.1f}' for c in top10]}")

    jackknife_results = []
    for iso in top10:
        df_drop = df[df['iso3'] != iso].copy()
        r = run_gls(df_drop, 'fx_reserves_gdp', base_vars, f'Excl {iso}', silent=True)
        if r:
            z1 = r['Z_1_coef']
            p = r['Z_1_p']
            print(f"    Excl {iso}: Z₁={z1:.1f}{stars(p)} (p={p:.4f})")
            jackknife_results.append({'country': iso, 'z1': z1, 'z1_p': p,
                                      'mean_reserves': mean_reserves[iso],
                                      'n': r['n_obs']})

    doc_lines.append("| Dropped | Mean Reserves/GDP | Z₁ | p | Change from baseline |")
    doc_lines.append("|:--|--:|--:|--:|--:|")
    base_z1 = r_base['Z_1_coef'] if r_base else 0
    for jr in jackknife_results:
        change = jr['z1'] - base_z1
        doc_lines.append(f"| {jr['country']} | {jr['mean_reserves']:.1f}% | "
                        f"{jr['z1']:.1f}{stars(jr['z1_p'])} | {jr['z1_p']:.3f} | "
                        f"{change:+.1f} |")
    doc_lines.append(f"| *Baseline (all)* | *—* | *{base_z1:.1f}* | "
                    f"*{r_base['Z_1_p']:.3f}* | *—* |")

    doc_lines.append("")
    if jackknife_results:
        z1_vals = [jr['z1'] for jr in jackknife_results]
        doc_lines.append(f"**Assessment:** Z₁ ranges from {min(z1_vals):.1f} to {max(z1_vals):.1f} "
                        f"across jackknife samples (baseline: {base_z1:.1f}). ")
        max_change = max(abs(jr['z1'] - base_z1) for jr in jackknife_results)
        most_influential = max(jackknife_results, key=lambda x: abs(x['z1'] - base_z1))
        doc_lines.append(f"Most influential country: {most_influential['country']} "
                        f"(Z₁ moves {most_influential['z1'] - base_z1:+.1f} when dropped). ")
        all_sig = all(jr['z1_p'] < 0.05 for jr in jackknife_results)
        if all_sig:
            doc_lines.append("Signal remains significant (p<0.05) in all jackknife samples — "
                            "no single country drives the result.")
        else:
            loses = [jr['country'] for jr in jackknife_results if jr['z1_p'] >= 0.05]
            doc_lines.append(f"Signal loses significance when dropping: {', '.join(loses)}.")
    doc_lines.append("")

    # ══════════════════════════════════════════════════════════════════
    # SYNTHESIS
    # ══════════════════════════════════════════════════════════════════
    doc_lines.append("## Synthesis")
    doc_lines.append("")
    doc_lines.append("These tests address Goldberg & Hannaoui's (2026) observation that "
                     "aggregate reserve patterns are dominated by a few large holders. "
                     "Key findings:")
    doc_lines.append("")

    # Build synthesis from results
    findings = []
    if r_base and r_no_both:
        surv = "survives" if r_no_both['Z_1_p'] < 0.05 else "weakens" if r_no_both['Z_1_p'] < 0.1 else "collapses"
        findings.append(f"1. **China/Japan sensitivity:** The Z₁ signal {surv} when both are "
                       f"dropped ({r_base['Z_1_coef']:.1f}→{r_no_both['Z_1_coef']:.1f}, "
                       f"p={r_no_both['Z_1_p']:.3f}).")

    if r_low and r_high:
        findings.append(f"2. **Adequacy threshold:** Z₁ in low-reserve regime = "
                       f"{r_low['Z_1_coef']:.1f} (p={r_low['Z_1_p']:.3f}), "
                       f"high-reserve = {r_high['Z_1_coef']:.1f} (p={r_high['Z_1_p']:.3f}).")

    if r_dfx:
        findings.append(f"3. **Accumulation dynamics (ΔFX):** Z₁ = {r_dfx['Z_1_coef']:.1f} "
                       f"(p={r_dfx['Z_1_p']:.3f}) — demographics "
                       f"{'predict' if r_dfx['Z_1_p'] < 0.05 else 'do not predict'} "
                       f"reserve accumulation rates.")

    if r_lvl_safe and r_lvl_nonsafe:
        findings.append(f"4. **Safe vs non-safe:** Safe issuers Z₁={r_lvl_safe['Z_1_coef']:.1f} "
                       f"(p={r_lvl_safe['Z_1_p']:.3f}), non-safe Z₁={r_lvl_nonsafe['Z_1_coef']:.1f} "
                       f"(p={r_lvl_nonsafe['Z_1_p']:.3f}).")

    for f in findings:
        doc_lines.append(f)
    doc_lines.append("")

    doc_lines.append("### Connection to Goldberg & Hannaoui (2026)")
    doc_lines.append("")
    doc_lines.append("Goldberg & Hannaoui document that reserve currency composition shifts "
                     "correlate with fiscal events (TCJA, CBO deficit revisions) and geopolitical "
                     "developments. Our tests probe whether **demographic structure** — which "
                     "predicts both the fiscal trajectory (expenditure-revenue asymmetry of 3.3:1, "
                     "Paper 7) and reserve accumulation patterns — could be an omitted structural "
                     "variable in their framework. If Z₁ predicts aggregate reserves robustly "
                     "across samples, then the demographic transition may be a slow-moving "
                     "determinant of reserve demand that interacts with the currency composition "
                     "decisions Goldberg & Hannaoui analyze at higher frequency.")
    doc_lines.append("")
    doc_lines.append("### Data Limitation")
    doc_lines.append("")
    doc_lines.append("We cannot test the currency composition channel directly because our "
                     "reserve data (Lane & Milesi-Ferretti EWN) reports aggregate FX reserves "
                     "minus gold, without currency breakdown. The IMF COFER database or "
                     "Ito-McCauley currency composition estimates would be needed to test whether "
                     "demographics predict dollar share specifically. This remains a promising "
                     "extension requiring additional data acquisition.")
    doc_lines.append("")
    doc_lines.append("---")
    doc_lines.append(f"*Generated: March 7, 2026. Panel: net_gross_panel.csv "
                    f"({df['iso3'].nunique()} countries, {len(df):,} obs, 1950-2024).*")

    # Write document
    out_path = OUT / "goldberg_reserve_analysis.md"
    out_path.write_text('\n'.join(doc_lines), encoding='utf-8')
    print(f"\nSaved: {out_path}")
    print(f"Size: {out_path.stat().st_size / 1024:.0f} KB")


if __name__ == "__main__":
    main()
